Load and inspect MVP voting data

library(readr)

mvp_voting <- read_csv('Data/mvp_voting.csv')
Rows: 719 Columns: 21
── Column specification ────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): Rank, Player, Tm
dbl (18): Age, First, Pts Won, Pts Max, Share, G, MP, PTS, TRB, AST, STL, BLK, FG%, 3P%, FT%, WS, WS...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mvp_voting

Select only columns pertaining to MVP voting results

library(dplyr)

mvp_voting <- mvp_voting %>% select(Player, Year, `Pts Won`, `Pts Max`, Share)
mvp_voting

Load and inspect player stats

player_stats <- read_csv('Data/player_stats.csv')
Rows: 23881 Columns: 31
── Column specification ────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): Player, Pos, Tm
dbl (28): Rk, Age, G, GS, MP, FG, FGA, FG%, 3P, 3PA, 3P%, 2P, 2PA, 2P%, eFG%, FT, FTA, FT%, ORB, DRB...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
player_stats

Remove Rank column

player_stats <- player_stats %>% select(-Rk)
player_stats

Remove asterisks after names

library(stringr)

player_stats$Player <- str_replace_all(player_stats$Player, fixed("*"), "")
player_stats

Convert NA values for percentages to zeros. This also converts games started to zeros for those predating when that metric began being tracked. I will not use this column for my models, so it should have no impact

player_stats <- player_stats %>% mutate(across(everything(), ~ replace_na(.x, 0)))

Group the dataframe by the combined player and year. Then,

handle_multiple_teams <- function(df) {
  if (nrow(df) == 1) {
    return(df)
  }
  else {
    row <- df %>% filter(Tm == 'TOT')
    if (nrow(row) == 0) {
      return(df)
    }
    row$Tm <- as.character(df[nrow(df), "Tm"])
    return(row)
  }
}
player_stats$Tm <- as.character(player_stats$Tm)
player_stats <- player_stats %>% group_by(Player, Year) %>% group_modify(~ handle_multiple_teams(.x))
player_stats <- player_stats %>% ungroup()
player_stats

Merge MVP voting with player stats

player_stats_with_mvp_voting <- full_join(player_stats, mvp_voting, by = c("Player" = "Player", "Year" = "Year")) %>% mutate(
  `Pts Won` = replace_na(`Pts Won`, 0),
  `Pts Max` = replace_na(`Pts Max`, 0),
  Share = replace_na(Share, 0)
)
player_stats_with_mvp_voting

Load and inspect team stats

team_stats = read_csv('Data/team_stats.csv')
Rows: 1254 Columns: 9
── Column specification ────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): GB, Team
dbl (7): W, L, W/L%, PS/G, PA/G, SRS, Year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
team_stats

Remove asterisks and seeds from team names

team_stats$Team <- str_replace_all(team_stats$Team, fixed("*"), "")
team_stats$Team <- str_replace_all(team_stats$Team, "\\([^\\)]+\\)", "")
team_stats$Team <- str_squish(team_stats$Team)
team_stats

Change dashes for games back to zeros

team_stats <- team_stats %>% mutate(GB = str_replace_all(GB, '—', '0'))
team_stats

Convert games back from characters to numeric

team_stats <- team_stats %>% mutate(GB = as.numeric(GB))
team_stats

Load mapping from full name to abbreviation

abbreviations <- list()

lines <- read_lines("Data/abbreviations.csv")

for (line in lines[-1]) {
  split_line <- strsplit(line, ",")[[1]]
  abbreviation <- split_line[1]
  name <- split_line[2]
  
  abbreviations[[abbreviation]] <- name
}

Add full names to player stats with MVP voting

player_stats_with_mvp_voting <- player_stats_with_mvp_voting %>% mutate(Team = recode(Tm, !!!abbreviations))
player_stats_with_mvp_voting

Merge player stats with MPV voting with team stats

everything <- full_join(player_stats_with_mvp_voting, team_stats, by = c("Team" = "Team", "Year" = "Year"))
everything

Save combined stats to csv

write_csv(everything, 'Data/combined_stats.csv')
LS0tCnRpdGxlOiAiRGF0YSBDbGVhbmluZyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKTG9hZCBhbmQgaW5zcGVjdCBNVlAgdm90aW5nIGRhdGEKCmBgYHtyfQpsaWJyYXJ5KHJlYWRyKQoKbXZwX3ZvdGluZyA8LSByZWFkX2NzdignRGF0YS9tdnBfdm90aW5nLmNzdicpCm12cF92b3RpbmcKYGBgCgpTZWxlY3Qgb25seSBjb2x1bW5zIHBlcnRhaW5pbmcgdG8gTVZQIHZvdGluZyByZXN1bHRzCgpgYGB7cn0KbGlicmFyeShkcGx5cikKCm12cF92b3RpbmcgPC0gbXZwX3ZvdGluZyAlPiUgc2VsZWN0KFBsYXllciwgWWVhciwgYFB0cyBXb25gLCBgUHRzIE1heGAsIFNoYXJlKQptdnBfdm90aW5nCmBgYAoKTG9hZCBhbmQgaW5zcGVjdCBwbGF5ZXIgc3RhdHMKCmBgYHtyfQpwbGF5ZXJfc3RhdHMgPC0gcmVhZF9jc3YoJ0RhdGEvcGxheWVyX3N0YXRzLmNzdicpCnBsYXllcl9zdGF0cwpgYGAKClJlbW92ZSBSYW5rIGNvbHVtbgoKYGBge3J9CnBsYXllcl9zdGF0cyA8LSBwbGF5ZXJfc3RhdHMgJT4lIHNlbGVjdCgtUmspCnBsYXllcl9zdGF0cwpgYGAKClJlbW92ZSBhc3Rlcmlza3MgYWZ0ZXIgbmFtZXMKCmBgYHtyfQpsaWJyYXJ5KHN0cmluZ3IpCgpwbGF5ZXJfc3RhdHMkUGxheWVyIDwtIHN0cl9yZXBsYWNlX2FsbChwbGF5ZXJfc3RhdHMkUGxheWVyLCBmaXhlZCgiKiIpLCAiIikKcGxheWVyX3N0YXRzCmBgYAoKQ29udmVydCBOQSB2YWx1ZXMgZm9yIHBlcmNlbnRhZ2VzIHRvIHplcm9zLiBUaGlzIGFsc28gY29udmVydHMgZ2FtZXMgc3RhcnRlZCB0byB6ZXJvcyBmb3IgdGhvc2UgcHJlZGF0aW5nCndoZW4gdGhhdCBtZXRyaWMgYmVnYW4gYmVpbmcgdHJhY2tlZC4gSSB3aWxsIG5vdCB1c2UgdGhpcyBjb2x1bW4gZm9yIG15IG1vZGVscywgc28gaXQgc2hvdWxkIGhhdmUgbm8gaW1wYWN0CgpgYGB7cn0KcGxheWVyX3N0YXRzIDwtIHBsYXllcl9zdGF0cyAlPiUgbXV0YXRlKGFjcm9zcyhldmVyeXRoaW5nKCksIH4gcmVwbGFjZV9uYSgueCwgMCkpKQpgYGAKCkdyb3VwIHRoZSBkYXRhZnJhbWUgYnkgdGhlIGNvbWJpbmVkIHBsYXllciBhbmQgeWVhci4gVGhlbiwgCgpgYGB7cn0KaGFuZGxlX211bHRpcGxlX3RlYW1zIDwtIGZ1bmN0aW9uKGRmKSB7CiAgaWYgKG5yb3coZGYpID09IDEpIHsKICAgIHJldHVybihkZikKICB9CiAgZWxzZSB7CiAgICByb3cgPC0gZGYgJT4lIGZpbHRlcihUbSA9PSAnVE9UJykKICAgIGlmIChucm93KHJvdykgPT0gMCkgewogICAgICByZXR1cm4oZGYpCiAgICB9CiAgICByb3ckVG0gPC0gYXMuY2hhcmFjdGVyKGRmW25yb3coZGYpLCAiVG0iXSkKICAgIHJldHVybihyb3cpCiAgfQp9CnBsYXllcl9zdGF0cyRUbSA8LSBhcy5jaGFyYWN0ZXIocGxheWVyX3N0YXRzJFRtKQpwbGF5ZXJfc3RhdHMgPC0gcGxheWVyX3N0YXRzICU+JSBncm91cF9ieShQbGF5ZXIsIFllYXIpICU+JSBncm91cF9tb2RpZnkofiBoYW5kbGVfbXVsdGlwbGVfdGVhbXMoLngpKQpwbGF5ZXJfc3RhdHMgPC0gcGxheWVyX3N0YXRzICU+JSB1bmdyb3VwKCkKcGxheWVyX3N0YXRzCmBgYAoKTWVyZ2UgTVZQIHZvdGluZyB3aXRoIHBsYXllciBzdGF0cwoKYGBge3J9CnBsYXllcl9zdGF0c193aXRoX212cF92b3RpbmcgPC0gZnVsbF9qb2luKHBsYXllcl9zdGF0cywgbXZwX3ZvdGluZywgYnkgPSBjKCJQbGF5ZXIiID0gIlBsYXllciIsICJZZWFyIiA9ICJZZWFyIikpICU+JSBtdXRhdGUoCiAgYFB0cyBXb25gID0gcmVwbGFjZV9uYShgUHRzIFdvbmAsIDApLAogIGBQdHMgTWF4YCA9IHJlcGxhY2VfbmEoYFB0cyBNYXhgLCAwKSwKICBTaGFyZSA9IHJlcGxhY2VfbmEoU2hhcmUsIDApCikKcGxheWVyX3N0YXRzX3dpdGhfbXZwX3ZvdGluZwpgYGAKCkxvYWQgYW5kIGluc3BlY3QgdGVhbSBzdGF0cwoKYGBge3J9CnRlYW1fc3RhdHMgPSByZWFkX2NzdignRGF0YS90ZWFtX3N0YXRzLmNzdicpCnRlYW1fc3RhdHMKYGBgCgpSZW1vdmUgYXN0ZXJpc2tzIGFuZCBzZWVkcyBmcm9tIHRlYW0gbmFtZXMKCmBgYHtyfQp0ZWFtX3N0YXRzJFRlYW0gPC0gc3RyX3JlcGxhY2VfYWxsKHRlYW1fc3RhdHMkVGVhbSwgZml4ZWQoIioiKSwgIiIpCnRlYW1fc3RhdHMkVGVhbSA8LSBzdHJfcmVwbGFjZV9hbGwodGVhbV9zdGF0cyRUZWFtLCAiXFwoW15cXCldK1xcKSIsICIiKQp0ZWFtX3N0YXRzJFRlYW0gPC0gc3RyX3NxdWlzaCh0ZWFtX3N0YXRzJFRlYW0pCnRlYW1fc3RhdHMKYGBgCgpDaGFuZ2UgZGFzaGVzIGZvciBnYW1lcyBiYWNrIHRvIHplcm9zCgpgYGB7cn0KdGVhbV9zdGF0cyA8LSB0ZWFtX3N0YXRzICU+JSBtdXRhdGUoR0IgPSBzdHJfcmVwbGFjZV9hbGwoR0IsICfigJQnLCAnMCcpKQp0ZWFtX3N0YXRzCmBgYAoKQ29udmVydCBnYW1lcyBiYWNrIGZyb20gY2hhcmFjdGVycyB0byBudW1lcmljCgpgYGB7cn0KdGVhbV9zdGF0cyA8LSB0ZWFtX3N0YXRzICU+JSBtdXRhdGUoR0IgPSBhcy5udW1lcmljKEdCKSkKdGVhbV9zdGF0cwpgYGAKCkxvYWQgbWFwcGluZyBmcm9tIGZ1bGwgbmFtZSB0byBhYmJyZXZpYXRpb24KCmBgYHtyfQphYmJyZXZpYXRpb25zIDwtIGxpc3QoKQoKbGluZXMgPC0gcmVhZF9saW5lcygiRGF0YS9hYmJyZXZpYXRpb25zLmNzdiIpCgpmb3IgKGxpbmUgaW4gbGluZXNbLTFdKSB7CiAgc3BsaXRfbGluZSA8LSBzdHJzcGxpdChsaW5lLCAiLCIpW1sxXV0KICBhYmJyZXZpYXRpb24gPC0gc3BsaXRfbGluZVsxXQogIG5hbWUgPC0gc3BsaXRfbGluZVsyXQogIAogIGFiYnJldmlhdGlvbnNbW2FiYnJldmlhdGlvbl1dIDwtIG5hbWUKfQpgYGAKCkFkZCBmdWxsIG5hbWVzIHRvIHBsYXllciBzdGF0cyB3aXRoIE1WUCB2b3RpbmcKCmBgYHtyfQpwbGF5ZXJfc3RhdHNfd2l0aF9tdnBfdm90aW5nIDwtIHBsYXllcl9zdGF0c193aXRoX212cF92b3RpbmcgJT4lIG11dGF0ZShUZWFtID0gcmVjb2RlKFRtLCAhISFhYmJyZXZpYXRpb25zKSkKcGxheWVyX3N0YXRzX3dpdGhfbXZwX3ZvdGluZwpgYGAKCk1lcmdlIHBsYXllciBzdGF0cyB3aXRoIE1QViB2b3Rpbmcgd2l0aCB0ZWFtIHN0YXRzCgpgYGB7cn0KZXZlcnl0aGluZyA8LSBmdWxsX2pvaW4ocGxheWVyX3N0YXRzX3dpdGhfbXZwX3ZvdGluZywgdGVhbV9zdGF0cywgYnkgPSBjKCJUZWFtIiA9ICJUZWFtIiwgIlllYXIiID0gIlllYXIiKSkKZXZlcnl0aGluZwpgYGAKClNhdmUgY29tYmluZWQgc3RhdHMgdG8gY3N2CgpgYGB7cn0Kd3JpdGVfY3N2KGV2ZXJ5dGhpbmcsICdEYXRhL2NvbWJpbmVkX3N0YXRzLmNzdicpCmBgYAoK